Skip to content

18 io流处理

有时候你需要把字符串当作文件来操作——比如测试文件读写逻辑、在内存中拼接CSV数据、处理网络响应……这时候StringIOBytesIO就派上用场了,它们是内存中的"虚拟文件"。

一、StringIO:内存文本流

1.1 基本用法

python
from io import StringIO

# 创建StringIO对象
buffer = StringIO()

# 写入数据
buffer.write("Hello, ")
buffer.write("World!\n")
buffer.write("第二行\n")

# 获取内容
content = buffer.getvalue()
print(content)
# Hello, World!
# 第二行

# 关闭
buffer.close()

1.2 带初始值

python
from io import StringIO

# 创建时就带内容
buffer = StringIO("初始内容\n")

# 读取
print(buffer.read())  # 初始内容

# 回到开头
buffer.seek(0)
print(buffer.readline())  # 初始内容

1.3 用with语句

python
from io import StringIO

with StringIO() as buffer:
    buffer.write("Hello\n")
    buffer.write("World\n")
    content = buffer.getvalue()
    print(content)

二、BytesIO:内存二进制流

2.1 基本用法

python
from io import BytesIO

# 创建BytesIO对象
buffer = BytesIO()

# 写入二进制数据
buffer.write(b"Hello, ")
buffer.write(b"World!")

# 获取内容
content = buffer.getvalue()
print(content)  # b'Hello, World!'

buffer.close()

2.2 带初始值

python
from io import BytesIO

buffer = BytesIO(b"binary data")
print(buffer.read())  # b'binary data'

三、文件接口

StringIOBytesIO都实现了完整的文件接口:

3.1 读取方法

python
from io import StringIO

buffer = StringIO("line1\nline2\nline3\n")

# read():读取全部
buffer.seek(0)
print(buffer.read())  # line1\nline2\nline3\n

# readline():读取一行
buffer.seek(0)
print(buffer.readline())  # line1\n

# readlines():读取所有行
buffer.seek(0)
print(buffer.readlines())  # ['line1\n', 'line2\n', 'line3\n']

# 迭代
buffer.seek(0)
for line in buffer:
    print(line.strip())

3.2 写入方法

python
from io import StringIO

buffer = StringIO()

# write():写入字符串
buffer.write("Hello")

# writelines():写入多个字符串
buffer.writelines([" World", "!"])

print(buffer.getvalue())  # Hello World!

3.3 定位方法

python
from io import StringIO

buffer = StringIO("Hello World")

# tell():返回当前位置
print(buffer.tell())  # 0

# seek():移动位置
buffer.seek(6)
print(buffer.read())  # World

# seek(0):回到开头
buffer.seek(0)
print(buffer.read())  # Hello World

四、实用场景

4.1 测试文件操作

python
from io import StringIO

def process_file(file_obj):
    """处理文件的函数"""
    lines = file_obj.readlines()
    return [line.strip().upper() for line in lines]

# 测试时不需要真实文件
test_data = StringIO("hello\nworld\npython\n")
result = process_file(test_data)
print(result)  # ['HELLO', 'WORLD', 'PYTHON']

4.2 内存中生成CSV

python
from io import StringIO
import csv

buffer = StringIO()
writer = csv.writer(buffer)

writer.writerow(["name", "age", "city"])
writer.writerow(["大志", 28, "北京"])
writer.writerow(["小明", 25, "上海"])

csv_content = buffer.getvalue()
print(csv_content)

4.3 捕获print输出

python
from io import StringIO
import sys

# 重定向stdout
old_stdout = sys.stdout
sys.stdout = StringIO()

print("这行不会显示在终端")
print("这行也不会")

# 获取输出
output = sys.stdout.getvalue()
sys.stdout = old_stdout

print(f"捕获到: {output}")

更优雅的方式:

python
from io import StringIO
from contextlib import redirect_stdout

with StringIO() as buffer:
    with redirect_stdout(buffer):
        print("捕获这行")
        print("也捕获这行")
    output = buffer.getvalue()

print(f"捕获到: {output}")

4.4 处理网络响应

python
from io import BytesIO
import json

# 模拟网络响应
response_data = b'{"name": "大志", "age": 28}'
buffer = BytesIO(response_data)

# 像读文件一样处理
data = json.loads(buffer.read())
print(data)  # {'name': '大志', 'age': 28}

4.5 临时文件替代

python
from io import BytesIO

def compress_data(data):
    """压缩数据"""
    import gzip
    buffer = BytesIO()
    with gzip.GzipFile(fileobj=buffer, mode='wb') as f:
        f.write(data)
    return buffer.getvalue()

def decompress_data(data):
    """解压数据"""
    import gzip
    buffer = BytesIO(data)
    with gzip.GzipFile(fileobj=buffer, mode='rb') as f:
        return f.read()

original = b"Hello World " * 1000
compressed = compress_data(original)
decompressed = decompress_data(compressed)

print(f"原始: {len(original)}字节")
print(f"压缩: {len(compressed)}字节")
print(f"解压后一致: {original == decompressed}")

五、编码处理

5.1 StringIO与编码

python
from io import StringIO

# StringIO处理的是字符串,不需要指定编码
buffer = StringIO()
buffer.write("你好世界")
print(buffer.getvalue())  # 你好世界

5.2 BytesIO与编码

python
from io import BytesIO

# BytesIO处理的是字节
buffer = BytesIO()

# 字符串需要先编码
text = "你好世界"
buffer.write(text.encode("utf-8"))

# 读取后需要解码
content = buffer.getvalue().decode("utf-8")
print(content)  # 你好世界

5.3 文本与二进制转换

python
from io import StringIO, BytesIO

# 文本 → 二进制
text_buffer = StringIO("Hello 你好")
text = text_buffer.getvalue()
bytes_buffer = BytesIO(text.encode("utf-8"))

# 二进制 → 文本
bytes_data = bytes_buffer.getvalue()
text_buffer = StringIO(bytes_data.decode("utf-8"))
print(text_buffer.getvalue())  # Hello 你好

六、性能考虑

6.1 大量拼接

python
from io import StringIO

# 不推荐:字符串拼接(每次创建新对象)
result = ""
for i in range(10000):
    result += str(i)

# 推荐:用StringIO
buffer = StringIO()
for i in range(10000):
    buffer.write(str(i))
result = buffer.getvalue()

# 更推荐:用join
result = "".join(str(i) for i in range(10000))

6.2 内存使用

python
from io import BytesIO

# BytesIO会把所有数据存在内存中
# 大文件应该用真实文件或流式处理
buffer = BytesIO()
for i in range(1000000):
    buffer.write(f"line {i}\n".encode())
# 这会占用大量内存

七、与其他模块配合

7.1 与csv模块

python
from io import StringIO
import csv

# 在内存中读写CSV
output = StringIO()
writer = csv.writer(output)
writer.writerow(["name", "age"])
writer.writerow(["大志", 28])

csv_string = output.getvalue()

# 解析CSV
input = StringIO(csv_string)
reader = csv.reader(input)
for row in reader:
    print(row)

7.2 与json模块

python
from io import StringIO
import json

# 在内存中读写JSON
buffer = StringIO()
json.dump({"name": "大志", "age": 28}, buffer, ensure_ascii=False)

json_string = buffer.getvalue()

# 解析JSON
data = json.loads(json_string)
print(data)

7.3 与gzip模块

python
from io import BytesIO
import gzip

# 内存中压缩
text = "Hello World " * 1000
buffer = BytesIO()
with gzip.GzipFile(fileobj=buffer, mode='wb') as f:
    f.write(text.encode())

compressed = buffer.getvalue()
print(f"压缩率: {len(compressed) / len(text):.2%}")

八、总结

io模块的核心类:

用途
StringIO内存文本流
BytesIO内存二进制流

两者都支持:

  • read() / readline() / readlines()
  • write() / writelines()
  • seek() / tell()
  • getvalue() 获取全部内容
  • with 语句

使用场景:

  • 测试时模拟文件
  • 内存中生成数据
  • 捕获输出
  • 处理网络响应
  • 替代临时文件

StringIO处理字符串,BytesIO处理字节,根据数据类型选择。